In [54]:
import os
from bs4 import BeautifulSoup
import time
import random

In [9]:
lst = os.listdir('files')

In [11]:
lst[0]


Out[11]:
'11.htm'

In [25]:
new_list = []
for elem in lst:
    text = open('files/'+elem,"r") 
    text = text.read()
    text_soup = BeautifulSoup(text, 'html.parser')

    #making a short list of numbers
    nr_lst = text_soup.find_all('td', {'class': 'tdcol2'})

    #iterating through täm,he list
    for nr in nr_lst:
        nr = nr.text
        new_list.append(nr)

In [27]:
from selenium import webdriver
from selenium.webdriver.common.by import By
from selenium.webdriver.support.ui import Select
from selenium.webdriver.common.keys import Keys
import time

In [29]:


In [35]:


In [37]:


In [41]:


In [50]:
lst = os.listdir('files2')

In [52]:
len(lst)


Out[52]:
27

In [43]:
!ls 'files2'


test.htm

In [55]:
for elem in new_list[27:]:
    
    driver = webdriver.Firefox()
    search_url = "https://www.shab.ch/shabforms/COMMON/search/searchForm.jsf?MODE=SHAB"
    driver.get(search_url)
    
    time.sleep(1)
    
    driver.find_element_by_id('NOTICE_NR').send_keys(elem)
    driver.find_element_by_class_name('btn').click()
    driver.find_element_by_class_name('prevLnk').click()
    
    text = driver.page_source
    file = open('files2/' + elem + '.htm',"w") 
    file.write(text)
    file.close()
    print(elem)
    driver.quit()


2255565
2230487
2221763
3652497
3641729
3635911
3647203
3629561
---------------------------------------------------------------------------
NoSuchElementException                    Traceback (most recent call last)
<ipython-input-55-a9eff0012728> in <module>()
      9     driver.find_element_by_id('NOTICE_NR').send_keys(elem)
     10     driver.find_element_by_class_name('btn').click()
---> 11     driver.find_element_by_class_name('prevLnk').click()
     12 
     13     text = driver.page_source

~/.virtualenvs/master/lib/python3.5/site-packages/selenium/webdriver/remote/webdriver.py in find_element_by_class_name(self, name)
    431             driver.find_element_by_class_name('foo')
    432         """
--> 433         return self.find_element(by=By.CLASS_NAME, value=name)
    434 
    435     def find_elements_by_class_name(self, name):

~/.virtualenvs/master/lib/python3.5/site-packages/selenium/webdriver/remote/webdriver.py in find_element(self, by, value)
    789         return self.execute(Command.FIND_ELEMENT, {
    790             'using': by,
--> 791             'value': value})['value']
    792 
    793     def find_elements(self, by=By.ID, value=None):

~/.virtualenvs/master/lib/python3.5/site-packages/selenium/webdriver/remote/webdriver.py in execute(self, driver_command, params)
    254         response = self.command_executor.execute(driver_command, params)
    255         if response:
--> 256             self.error_handler.check_response(response)
    257             response['value'] = self._unwrap_value(
    258                 response.get('value', None))

~/.virtualenvs/master/lib/python3.5/site-packages/selenium/webdriver/remote/errorhandler.py in check_response(self, response)
    192         elif exception_class == UnexpectedAlertPresentException and 'alert' in value:
    193             raise exception_class(message, screen, stacktrace, value['alert'].get('text'))
--> 194         raise exception_class(message, screen, stacktrace)
    195 
    196     def _value_or_default(self, obj, key, default):

NoSuchElementException: Message: Unable to locate element: .prevLnk

In [26]:
new_list


Out[26]:
['3622323',
 '3615849',
 '3577217',
 '3615897',
 '3607085',
 '3609623',
 '3609797',
 '3610131',
 '3589305',
 '3603515',
 '2224777',
 '2210701',
 '2213195',
 '2199253',
 '2177343',
 '2183049',
 '2174931',
 '2169461',
 '2140713',
 '2169511',
 '2284001',
 '2266413',
 '2271803',
 '2258493',
 '2255595',
 '2258289',
 '2260821',
 '2255565',
 '2230487',
 '2221763',
 '3652497',
 '3641729',
 '3635911',
 '3647203',
 '3629561',
 '3638473',
 '3638557',
 '3632141',
 '3629259',
 '3619205',
 '3600873',
 '3603607',
 '3594603',
 '3586539',
 '3591689',
 '3589147',
 '3583637',
 '3583611',
 '3577589',
 '3566299',
 '3562779',
 '3566143',
 '3565839',
 '3569269',
 '3565731',
 '3565169',
 '3554113',
 '3551509',
 '3534249',
 '3543185',
 '3463499',
 '3457949',
 '3469851',
 '3457599',
 '3460727',
 '3437273',
 '3440369',
 '3443577',
 '3434159',
 '3440297',
 '3496535',
 '3488623',
 '3483293',
 '3480939',
 '3488351',
 '3475551',
 '3454671',
 '3469575',
 '3457951',
 '3454683',
 '3089675',
 '3074507',
 '3057773',
 '3057639',
 '3043833',
 '3050723',
 '3040867',
 '3036065',
 '3028883',
 '3003041',
 '3540325',
 '3526029',
 '3528343',
 '3520679',
 '3517575',
 '3525871',
 '3515021',
 '3509279',
 '3503417',
 '3509139',
 '3514629',
 '3503591',
 '3506557',
 '3514631',
 '3496317',
 '3496365',
 '3493879',
 '3503331',
 '3493629',
 '3493611',
 '2986819',
 '3002335',
 '2977675',
 '2986985',
 '2970513',
 '2957347',
 '2912793',
 '2895423',
 '2895447',
 '2889801',
 '3892323',
 '3892501',
 '3900829',
 '3900919',
 '3886529',
 '3875013',
 '3869619',
 '3865807',
 '3854979',
 '3845005',
 '3686225',
 '3686113',
 '3685883',
 '3677043',
 '3683809',
 '3683581',
 '3670733',
 '3681571',
 '3684197',
 '3676845',
 '3663313',
 '3672869',
 '3674905',
 '3672577',
 '3660605',
 '3655829',
 '3668261',
 '3652803',
 '3663085',
 '3652585',
 '3894847',
 '3892323',
 '3892501',
 '3900919',
 '3900829',
 '3886529',
 '3875013',
 '3869619',
 '3865807',
 '3854979',
 '3845005',
 '3831469',
 '3832063',
 '3837057',
 '3841973',
 '3842427',
 '3828573',
 '3826331',
 '3821269',
 '3831463',
 '3821465',
 '3818505',
 '3810363',
 '3815843',
 '3807619',
 '3798753',
 '3804671',
 '3796137',
 '3787673',
 '3779413',
 '3716525',
 '3718673',
 '3707011',
 '3711889',
 '3706309',
 '3691257',
 '3691641',
 '3691275',
 '3698463',
 '3688801',
 '2034207',
 '2026327',
 '2029101',
 '2003169',
 '2010235',
 '1989715',
 '1995281',
 '1992453',
 '1963485',
 '1968965',
 '2140687',
 '2143725',
 '2115567',
 '2115791',
 '2099461',
 '2066377',
 '2058599',
 '2061157',
 '2066459',
 '2042271',
 '3743295',
 '3732305',
 '3737493',
 '3742935',
 '3740323',
 '3724519',
 '3718945',
 '3721825',
 '3712047',
 '3721469',
 '1963927',
 '1966505',
 '1960255',
 '1930225',
 '1938641',
 '1938545',
 '1896675',
 '1847383',
 '1893317',
 '1866691',
 '3782321',
 '3782323',
 '3790403',
 '3790055',
 '3774341',
 '3782313',
 '3766221',
 '3773875',
 '3771401',
 '3761215',
 '3761343',
 '3751219',
 '3761331',
 '3753353',
 '3758495',
 '3758237',
 '3755995',
 '3740503',
 '3748469',
 '3748457',
 '1847383',
 '1893317',
 '1866691',
 '1880691',
 '1877137',
 '1872261',
 '1852729',
 '1855601',
 '1847265',
 '3239167',
 '3249375',
 '3229283',
 '3242243',
 '3242253',
 '3232185',
 '3235529',
 '3253307',
 '3214261',
 '3216991',
 '2869751',
 '2876329',
 '2878621',
 '2881325',
 '2849401',
 '2852171',
 '2838175',
 '2827463',
 '2825521',
 '2804345',
 '3442817',
 '3434113',
 '3434419',
 '3419821',
 '3431637',
 '3426041',
 '3411955',
 '3417045',
 '3398027',
 '3412081',
 '3404087',
 '3394719',
 '3400649',
 '3395235',
 '3389303',
 '3389501',
 '3389181',
 '3376383',
 '3362601',
 '3376095',
 '2801669',
 '2791057',
 '2769463',
 '2752787',
 '2764499',
 '2750489',
 '2750357',
 '2747357',
 '2750309',
 '2736221',
 '3205965',
 '3211395',
 '3211391',
 '3211423',
 '3222885',
 '3205139',
 '3194341',
 '3208463',
 '3208351',
 '3202377',
 '2668649',
 '2684099',
 '2645617',
 '2635077',
 '2634951',
 '2643553',
 '2651753',
 '2636053',
 '2629251',
 '2621349',
 '3114783',
 '3129937',
 '3131713',
 '3119765',
 '3127097',
 '3115141',
 '3119823',
 '3100393',
 '3089219',
 '3081677',
 '3196925',
 '3185993',
 '3191787',
 '3185995',
 '3161395',
 '3163739',
 '3151153',
 '3151105',
 '3153597',
 '3136927',
 '2739147',
 '2720073',
 '2719873',
 '2711651',
 '2703423',
 '2701057',
 '2698099',
 '2694119',
 '2691819',
 '2679309',
 '2453841',
 '2445949',
 '2448329',
 '2436163',
 '2411337',
 '2398677',
 '2377013',
 '2376533',
 '2374405',
 '2370163',
 '3306073',
 '3292533',
 '3303571',
 '3303981',
 '3309155',
 '3308873',
 '3303273',
 '3303415',
 '3289793',
 '3292561',
 '3289499',
 '3286263',
 '3283589',
 '3283133',
 '3280319',
 '3283523',
 '3280041',
 '3268003',
 '3277479',
 '3194571',
 '2362571',
 '2362891',
 '2352905',
 '2322825',
 '2316315',
 '2310247',
 '2289561',
 '2291319',
 '2289537',
 '2276635',
 '3344613',
 '3335903',
 '3353691',
 '3336393',
 '3338857',
 '3327407',
 '3321619',
 '3312129',
 '3321465',
 '3315037',
 '2538665',
 '2535583',
 '2513091',
 '2505447',
 '2494789',
 '2490227',
 '2492411',
 '2484521',
 '2468983',
 '2466181',
 '2611937',
 '2618369',
 '2618171',
 '2603189',
 '2600329',
 '2594343',
 '2592019',
 '2551297',
 '2582637',
 '2558007',
 '3362591',
 '3381287',
 '3365105',
 '3365153',
 '3353871',
 '3365123',
 '3364903',
 '3362477',
 '3341851',
 '3333147']

In [23]:


In [24]:
new_list


Out[24]:
['3894847',
 '3892323',
 '3892501',
 '3900919',
 '3900829',
 '3886529',
 '3875013',
 '3869619',
 '3865807',
 '3854979']

In [ ]: